Chapter 5 Community composition
5.1 Taxonomy overview
5.1.1 Stacked barplot
genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
filter(count > 0) %>% #filter 0 counts
ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
scale_fill_manual(values=phylum_colors) +
facet_nested(. ~ region + environment + treatment, scales="free") + #facet per day and treatment
guides(fill = guide_legend(ncol = 1)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
axis.title.x = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
labs(fill="Phylum",y = "Relative abundance",x="Samples")
Number of bacteria phyla
[1] 13
5.1.2 Phylum relative abundances
phylum_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
left_join(sample_metadata, by = join_by(sample == sample)) %>%
left_join(genome_metadata, by = join_by(genome == genome)) %>%
group_by(sample,phylum,region, environment,treatment) %>%
summarise(relabun=sum(count))phylum_summary %>%
group_by(phylum) %>%
summarise(total_mean=mean(relabun*100, na.rm=T),
total_sd=sd(relabun*100, na.rm=T)) %>%
mutate(total=str_c(round(total_mean,2),"±",round(total_sd,2))) %>%
arrange(-total_mean) %>%
dplyr::select(phylum,total) %>%
tt()| phylum | total |
|---|---|
| p__Bacteroidota | 55.63±15.89 |
| p__Bacillota_A | 18.37±6.29 |
| p__Pseudomonadota | 10.83±12.9 |
| p__Bacillota | 5.01±8.39 |
| p__Verrucomicrobiota | 4.83±4.78 |
| p__Desulfobacterota | 1.93±1.73 |
| p__Fusobacteriota | 1.22±2.09 |
| p__Deferribacterota | 0.78±1.08 |
| p__Bacillota_C | 0.59±0.89 |
| p__Cyanobacteriota | 0.45±0.56 |
| p__Bacillota_B | 0.17±0.15 |
| p__Elusimicrobiota | 0.15±0.42 |
| p__Chlamydiota | 0.05±0.1 |
phylum_arrange <- phylum_summary %>%
group_by(phylum) %>%
summarise(mean=mean(relabun)) %>%
arrange(-mean) %>%
select(phylum) %>%
pull()
phylum_summary %>%
filter(phylum %in% phylum_arrange) %>%
mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
geom_jitter(alpha=0.5) +
theme_minimal() +
theme(legend.position="none") +
labs(y="Phylum",x="Relative abundance")5.2 Taxonomy boxplot
5.2.1 Family
family_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,family) %>%
summarise(relabun=sum(count))
family_summary %>%
group_by(family) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean) %>%
tt()| family | mean | sd |
|---|---|---|
| f__Bacteroidaceae | 2.583672e-01 | 1.402038e-01 |
| f__Rikenellaceae | 1.497520e-01 | 7.690553e-02 |
| f__Tannerellaceae | 7.391528e-02 | 4.445621e-02 |
| f__Ruminococcaceae | 6.634698e-02 | 4.600300e-02 |
| f__Lachnospiraceae | 4.737236e-02 | 3.212973e-02 |
| f__Akkermansiaceae | 4.592234e-02 | 4.643987e-02 |
| f__Enterobacteriaceae | 3.966049e-02 | 8.976101e-02 |
| f__Marinifilaceae | 3.948347e-02 | 3.440221e-02 |
| f__Aeromonadaceae | 2.888392e-02 | 4.894755e-02 |
| f__Mycoplasmoidaceae | 2.767279e-02 | 8.415652e-02 |
| f__ | 2.201216e-02 | 1.936504e-02 |
| f__Erysipelotrichaceae | 2.124871e-02 | 1.736934e-02 |
| f__Desulfovibrionaceae | 1.925392e-02 | 1.729129e-02 |
| f__Moraxellaceae | 1.291154e-02 | 2.575427e-02 |
| f__Oscillospiraceae | 1.270564e-02 | 8.459566e-03 |
| f__Clostridiaceae | 1.236215e-02 | 1.829537e-02 |
| f__Fusobacteriaceae | 1.223384e-02 | 2.090091e-02 |
| f__Cellulosilyticaceae | 1.113335e-02 | 1.974877e-02 |
| f__CAG-239 | 9.360822e-03 | 1.360659e-02 |
| f__Butyricicoccaceae | 8.930175e-03 | 2.594866e-02 |
| f__Mucispirillaceae | 7.841353e-03 | 1.081069e-02 |
| f__CHK158-818 | 7.232655e-03 | 8.140593e-03 |
| f__Anaerovoracaceae | 6.035251e-03 | 8.674362e-03 |
| f__Muribaculaceae | 6.002582e-03 | 6.747350e-03 |
| f__Peptostreptococcaceae | 5.317766e-03 | 1.569151e-02 |
| f__P3 | 5.228558e-03 | 8.382358e-03 |
| f__UBA3637 | 4.852446e-03 | 9.697163e-03 |
| f__Gastranaerophilaceae | 4.273520e-03 | 5.543289e-03 |
| f__Pumilibacteraceae | 2.713899e-03 | 3.103888e-03 |
| f__UBA932 | 2.573314e-03 | 3.835337e-03 |
| f__Anaerotignaceae | 2.540732e-03 | 2.377879e-03 |
| f__Acutalibacteraceae | 2.530090e-03 | 3.199571e-03 |
| f__UBA3830 | 2.212427e-03 | 3.375679e-03 |
| f__Chromobacteriaceae | 1.858083e-03 | 8.490568e-03 |
| f__Succinispiraceae | 1.777645e-03 | 2.030364e-03 |
| f__Massilibacillaceae | 1.723616e-03 | 3.670656e-03 |
| f__Pseudomonadaceae | 1.506298e-03 | 2.753367e-03 |
| f__Elusimicrobiaceae | 1.488813e-03 | 4.220278e-03 |
| f__UBA1997 | 1.435499e-03 | 4.204248e-03 |
| f__Peptococcaceae | 1.433042e-03 | 1.313017e-03 |
| f__Chitinibacteraceae | 1.330971e-03 | 3.241530e-03 |
| f__Coprobacteraceae | 9.066416e-04 | 1.450932e-03 |
| f__Sedimentibacteraceae | 8.685113e-04 | 1.168186e-03 |
| f__CAG-508 | 8.538769e-04 | 4.698702e-03 |
| f__Burkholderiaceae_A | 8.306559e-04 | 2.376298e-03 |
| f__Shewanellaceae | 7.349097e-04 | 2.541624e-03 |
| f__Coprobacillaceae | 6.481948e-04 | 1.468116e-03 |
| f__UBA1820 | 6.453674e-04 | 1.039951e-03 |
| f__Xanthobacteraceae | 5.704160e-04 | 2.271298e-03 |
| f__Chlamydiaceae | 4.705475e-04 | 1.047759e-03 |
| f__GCF-1484045 | 3.968194e-04 | 2.347615e-03 |
| f__CALVMC01 | 3.395805e-04 | 1.778833e-03 |
| f__Borkfalkiaceae | 3.253103e-04 | 5.978903e-04 |
| f__UBA7702 | 2.752987e-04 | 6.465242e-04 |
| f__Eubacteriaceae | 2.752223e-04 | 4.951364e-04 |
| f__UBA3700 | 1.468068e-04 | 8.685208e-04 |
| f__CALYAR01 | 1.465184e-04 | 2.571231e-04 |
| f__Enterococcaceae | 9.119125e-05 | 5.394947e-04 |
| f__UBA660 | 3.641870e-05 | 9.107289e-05 |
family_arrange <- family_summary %>%
group_by(family) %>%
summarise(mean=sum(relabun)) %>%
arrange(-mean) %>%
select(family) %>%
pull()
# Per origin
family_summary %>%
left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
filter(family %in% family_arrange[1:20]) %>%
mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
scale_color_manual(values=phylum_colors[-8]) +
geom_jitter(alpha=0.5) +
facet_grid(.~environment)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")5.2.2 Genus
genus_summary <- genome_counts_filt %>%
mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
group_by(sample,phylum,genus) %>%
summarise(relabun=sum(count)) %>%
filter(genus != "g__") %>%
mutate(genus= sub("^g__", "", genus))
genus_summary_sort <- genus_summary %>%
group_by(genus) %>%
summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
arrange(-mean)
genus_summary_sort %>%
tt()| genus | mean | sd |
|---|---|---|
| Bacteroides | 2.517446e-01 | 1.390618e-01 |
| Mucinivorans | 6.923418e-02 | 4.752036e-02 |
| Parabacteroides | 6.030294e-02 | 3.859090e-02 |
| Aeromonas | 2.888392e-02 | 4.894755e-02 |
| Odoribacter | 2.459781e-02 | 2.122265e-02 |
| Akkermansia | 2.443440e-02 | 3.399107e-02 |
| Mycoplasma_L | 2.305445e-02 | 8.469675e-02 |
| JADFUS01 | 2.278804e-02 | 1.254175e-02 |
| UBA866 | 2.223665e-02 | 2.645409e-02 |
| Hafnia | 1.703127e-02 | 8.653435e-02 |
| Alistipes | 1.610525e-02 | 1.296819e-02 |
| Plesiomonas | 1.419193e-02 | 3.093735e-02 |
| Parabacteroides_B | 1.361234e-02 | 1.262429e-02 |
| Acinetobacter | 1.291154e-02 | 2.575427e-02 |
| Dielma | 1.165436e-02 | 1.430759e-02 |
| Cetobacterium | 1.110575e-02 | 2.029156e-02 |
| Clostridium | 1.069384e-02 | 1.723953e-02 |
| Bilophila | 1.023454e-02 | 1.247143e-02 |
| JAIHAL01 | 9.803405e-03 | 1.823757e-02 |
| CAJGBR01 | 9.642751e-03 | 8.330717e-03 |
| 14-2 | 8.887441e-03 | 1.959989e-02 |
| Angelakisella | 8.653329e-03 | 7.804514e-03 |
| Gallibacteroides | 7.232655e-03 | 8.140593e-03 |
| Clostridium_Q | 6.859035e-03 | 8.336175e-03 |
| Hydrogenoanaerobacterium | 6.381847e-03 | 6.231932e-03 |
| HGM05232 | 6.002582e-03 | 6.747350e-03 |
| Bacteroides_G | 5.561445e-03 | 6.407073e-03 |
| Buttiauxella | 4.894942e-03 | 1.436544e-02 |
| SZUA-378 | 4.878108e-03 | 1.470108e-02 |
| Malacoplasma | 4.618338e-03 | 1.047538e-02 |
| Hungatella_A | 4.389518e-03 | 5.344594e-03 |
| Anaerotruncus | 4.275112e-03 | 4.410571e-03 |
| Alistipes_A | 4.186772e-03 | 3.802029e-03 |
| Pseudoflavonifractor | 3.942622e-03 | 3.930292e-03 |
| Intestinimonas | 3.873406e-03 | 3.447851e-03 |
| Tidjanibacter | 3.806538e-03 | 3.328943e-03 |
| Anaerovorax | 3.358325e-03 | 7.507482e-03 |
| Avirikenella | 3.116317e-03 | 4.004750e-03 |
| RGIG3102 | 3.041666e-03 | 5.273174e-03 |
| Gallalistipes | 3.020969e-03 | 2.579968e-03 |
| Anaerorhabdus | 2.979294e-03 | 4.502520e-03 |
| Paraclostridium | 2.925988e-03 | 1.536966e-02 |
| UMGS1251 | 2.753843e-03 | 4.370592e-03 |
| Egerieousia | 2.573314e-03 | 3.835337e-03 |
| JAGAJR01 | 2.390996e-03 | 5.196731e-03 |
| Mobilisporobacter | 2.341649e-03 | 4.066854e-03 |
| UMGS1202 | 2.027421e-03 | 1.987383e-03 |
| JAAYQI01 | 1.902859e-03 | 2.200810e-03 |
| Craterilacuibacter | 1.858083e-03 | 8.490568e-03 |
| Copranaerobaculum | 1.829992e-03 | 8.237395e-03 |
| Amedibacillus | 1.694844e-03 | 2.379494e-03 |
| Butyricimonas | 1.692717e-03 | 2.006344e-03 |
| JAHHTP01 | 1.680060e-03 | 2.017330e-03 |
| Sarcina | 1.668308e-03 | 3.171415e-03 |
| Negativibacillus | 1.561698e-03 | 1.820190e-03 |
| Intestinibacillus | 1.537995e-03 | 1.677494e-03 |
| Rikenella | 1.513511e-03 | 2.463151e-03 |
| Pseudomonas_E | 1.506298e-03 | 2.753367e-03 |
| Ruthenibacterium | 1.471374e-03 | 2.597706e-03 |
| Evtepia | 1.454607e-03 | 1.692671e-03 |
| Romboutsia_D | 1.377166e-03 | 3.495412e-03 |
| Deefgea | 1.330971e-03 | 3.241530e-03 |
| Phocea | 1.290108e-03 | 2.335155e-03 |
| Spyradomonas | 1.283614e-03 | 2.009466e-03 |
| Budvicia | 1.267490e-03 | 6.379230e-03 |
| JAGNZR01 | 1.128086e-03 | 3.938589e-03 |
| UBA7488 | 1.112285e-03 | 2.162473e-03 |
| RGIG4140 | 1.069355e-03 | 6.021280e-03 |
| Aminipila | 1.067588e-03 | 2.200351e-03 |
| WRKB01 | 1.061160e-03 | 2.721916e-03 |
| Romboutsia_A | 1.014612e-03 | 1.751435e-03 |
| Serratia_A | 9.262594e-04 | 3.342514e-03 |
| CAKVBE01 | 9.105039e-04 | 3.028369e-03 |
| Coprobacter | 9.066416e-04 | 1.450932e-03 |
| RGIG7389 | 9.031113e-04 | 1.056226e-03 |
| RGIG8482 | 8.538769e-04 | 4.698702e-03 |
| JAEZVV01 | 8.306559e-04 | 2.376298e-03 |
| Massiliimalia | 8.201689e-04 | 1.353876e-03 |
| JAJBUQ01 | 8.132764e-04 | 1.303446e-03 |
| Robinsoniella | 7.893139e-04 | 1.692794e-03 |
| Bacilliculturomica | 7.505217e-04 | 1.159335e-03 |
| MGBC133411 | 7.434775e-04 | 1.077399e-03 |
| Shewanella | 7.349097e-04 | 2.541624e-03 |
| Coprobacillus | 6.481948e-04 | 1.468116e-03 |
| IOR16 | 6.442482e-04 | 9.524998e-04 |
| Kluyvera | 6.393609e-04 | 2.904551e-03 |
| UBA1174 | 5.845069e-04 | 3.314194e-03 |
| Bradyrhizobium | 5.704160e-04 | 2.271298e-03 |
| HGM16780 | 5.648527e-04 | 2.422257e-03 |
| Amedibacterium | 5.449099e-04 | 2.608749e-03 |
| Fimivivens | 5.043338e-04 | 6.426073e-04 |
| Anaerotignum | 4.555793e-04 | 9.568867e-04 |
| Citrobacter | 4.246318e-04 | 1.467049e-03 |
| Muricomes | 4.103528e-04 | 6.526275e-04 |
| 51-20 | 3.322612e-04 | 1.965684e-03 |
| UBA1794 | 3.101507e-04 | 5.552653e-04 |
| JAGPHI01 | 2.969901e-04 | 6.792123e-04 |
| Yersinia | 2.846062e-04 | 1.069499e-03 |
| Cryptoclostridium | 2.752987e-04 | 6.465242e-04 |
| Longicatena | 2.747360e-04 | 1.625360e-03 |
| Massilioclostridium | 2.709713e-04 | 6.165792e-04 |
| Dysosmobacter | 2.360930e-04 | 4.680885e-04 |
| CALXSC01 | 2.222364e-04 | 7.261421e-04 |
| Hespellia | 2.221637e-04 | 4.563625e-04 |
| Scatenecus | 1.939464e-04 | 1.011852e-03 |
| SIG603 | 1.919687e-04 | 3.431537e-04 |
| Faecalimonas | 1.919022e-04 | 4.208517e-04 |
| CAZU01 | 1.657047e-04 | 9.803223e-04 |
| Lactonifactor | 1.237770e-04 | 4.738322e-04 |
| Enterococcus | 9.119125e-05 | 5.394947e-04 |
| MGBC107952 | 3.641870e-05 | 9.107289e-05 |
genus_arrange <- genus_summary %>%
group_by(genus) %>%
summarise(mean=sum(relabun)) %>%
filter(genus != "g__")%>%
arrange(-mean) %>%
select(genus) %>%
mutate(genus= sub("^g__", "", genus)) %>%
pull()
#Per pond
genus_summary %>%
left_join(sample_metadata,by=join_by(sample==sample)) %>%
mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
filter(relabun > 0) %>%
ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
scale_color_manual(values=phylum_colors) +
geom_jitter(alpha=0.5) +
facet_grid(.~environment)+
theme_minimal() +
labs(y="Family", x="Relative abundance", color="Phylum")